#! /usr/bin/env python

import HTMLParser

class URLParser(HTMLParser.HTMLParser):
	def __init__(self):
		HTMLParser.HTMLParser.__init__(self)
		self.urls = []

	def handle_starttag(self, tag, attributes):
		if tag != 'a': return
		for name, value in attributes:
			if name == 'href' and value not in self.urls:
				self.urls.append (value)

def URLHost(url):
	return url[7:].partition('/')[0]

def URLPath(url):
	return "/"+url[7:].partition('/')[2]

import re

def RemoveHtmlTags(contents):
	policy = re.compile(r'<.*?>')
	return policy.sub('', contents)
